========================================================================= 
Log Path: /Users/benjaminleo/Downloads/PGMs_and_CW_onset_Empirics/log/Building the Dataset:clean.log 
Program Path: /Users/benjaminleo/Downloads/PGMs_and_CW_onset_Empirics/Building the Dataset:clean.R 
Working Directory: /Users/benjaminleo/Downloads/PGMs_and_CW_onset_Empirics 
User Name: benjaminleo 
R Version: 4.3.1 (2023-06-16) 
Machine: Benjamins-MacBook-Air-4.local arm64 
Operating System: Darwin 24.2.0 Darwin Kernel Version 24.2.0: Fri Dec 6 18:40:14 PST 2024; root:xnu-11215.61.5~2/RELEASE_ARM64_T8103 
Base Packages: stats graphics grDevices utils datasets methods base 
Other Packages: maps_3.4.2 janitor_2.2.0 wbstats_1.0.4 stevemisc_1.6.0 peacesciencer_1.1.0 bit_4.0.5 dataverse_0.3.13 countrycode_1.5.0 logr_1.3.8 common_1.1.3 foreign_0.8-86 texreg_1.39.3 margins_0.3.26 haven_2.5.4 MatchIt_4.5.5 BalanceCheck_0.2 optmatch_0.10.7 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.0 dplyr_1.1.4 purrr_1.0.2 readr_2.1.4 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.0 tidyverse_2.0.0 fcuk_0.1.21 
Log Start Time: 2025-01-15 10:14:14.256399 
========================================================================= 

> #######------PGMs and Conflict Data Construction------#######
> 
> #This file is only for creating the data frame 
>     #and visualizations of the data (maps and whatnot)
> 
> # Building the DF ---------------------------------------------------------
> rm(list = ls()) 
> library(dplyr)
> library(tidyr)
> library(countrycode)
> #Set working directory
> getwd()
> #dir.create("/Users/benjaminleo/Downloads/PGMs_and_CW_onset_Empirics")
> setwd("/Users/benjaminleo/Downloads/PGMs_and_CW_onset_Empirics")
> 
> #Loading the PGM dataset 
> #install.packages("dataverse")
> library(dataverse)
> #install.packages("bit")
> library(bit)
> Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
> pgms <- get_dataframe_by_name(
>   filename = "pgmdv2_countryyear.tab",
>   dataset = "doi:10.7910/DVN/YK8L4I",
>   server = "dataverse.harvard.edu"
> )
> View(pgms)
> 
> #Using peacescienceR to create a dataframe with most of the control variables  
> #install.packages("peacesciencer")
> library(peacesciencer)
> cow_df <- create_stateyears(system = "cow") 
> View(cow_df)
> 
> #adding in data 
> cow_df <- add_democracy(cow_df)
> cow_df <- add_rugged_terrain(cow_df)
> cow_df <- add_cow_majors(cow_df)
> cow_df <- add_sdp_gdp(cow_df)
> cow_df <- add_creg_fractionalization(cow_df)
> cow_df <- add_nmc(cow_df)
> cow_df <- add_gwcode_to_cow(cow_df)
> cow_df <- add_ucdp_acd(cow_df, c("intrastate","||"))
> 
> 
> #Time since conflict variable 
> library(stevemisc)
> cow_df <- ps_btscs(cow_df, ucdpongoing, year, ccode)
> summary(cow_df$spell)
> 
> cow_df <- cow_df |>
>   rename(ts_war = spell)
> 
> #Reducing the temporal zone to the apropriate time period 
> cow_df <- cow_df |>
>   filter(year >= 1981 & year <= 2014)
> cow_df <- unique(cow_df)
> 
> #Taking a look at this dataframe 
> summary(cow_df)
> 
> 
> #Loading in coup data  
> coups <- read.delim('http://www.uky.edu/~clthyn2/coup_data/powell_thyne_coups_final.txt')
> View(coups)
> # Assuming coups is your dataframe
> coups <- coups|>
>   mutate(coup_attempt = 1)
> coups <- select(coups, -c("ccode_gw", 
>                           "ccode_polity", 
>                           "month", 
>                           "day", 
>                           "version"))
> #correct dataframe 
> coups <- coups |>
>   filter(year >= 1981 & year <= 2014)
> 
> #Need to get this to country year 
> coups <- distinct(coups, ccode, year, .keep_all = TRUE)
> 
> 
> #EPR data - better data on ethnic exlusion than CREG
> #Cant use Selway data, ends in 1999 - kills 15 years of obs 
> epr <- read.delim("https://icr.ethz.ch/data/epr/core/EPR-2021.txt")
> View(epr)
> 
> #I need to create an indicator for the level of ethnic inequality by state
> #this is going to be a bit of a pain
> epr <- epr |>
>   rename(country = statename)
> 
> #need to first make group year the UOA
> expanded_epr <- epr |>
>   group_by(group, gwid) |>
>   summarise(from = min(from), to = max(to)) |>
>   uncount(to - from + 1, .id = "index") |>
>   mutate(year = from + index - 1) |>
>   select(-from, -to, -index) |>
>   left_join(epr, by = c("group", "gwid"))
> 
> View(expanded_epr)
> 
> 
> #Now we need to sort by country year, and add indicators for ethnic exclusion
> # Create the new variable ethnic_exclus based on status
> expanded_epr$ethnic_exclus <- ifelse(expanded_epr$status %in% c("DISCRIMINATED", "POWERLESS"), 1, 0)
> hist(expanded_epr$ethnic_exclus)
> 
> 
> # Group by country and year and collapse observations
> collapsed_epr <- expanded_epr |>
>   group_by(gwid, year) |>
>   summarise(
>     ethnic_exclus = as.numeric(any(ethnic_exclus == 1)),
>     total_size = sum(size),
>     num_groups = sum(ethnic_exclus)
>   )
> 
> 
> #I need an interaction term between presence and group size 
> collapsed_epr$size_exclu_int <- collapsed_epr$num_groups * collapsed_epr$total_size
> hist(collapsed_epr$size_exclu_int)
> 
> 
> #checking everything out 
> hist(collapsed_epr$ethnic_exclus)
> hist(collapsed_epr$num_groups)
> rm(expanded_epr, epr)
> View(collapsed_epr)
> 
> 
> 
> #set the right timeframe 
> collapsed_epr <- collapsed_epr |>
>   filter(year < 2015,
>          year > 1980)
> 
> #good to go 
> 
> #World Bank database to get some of the control variables that arent in psr
> #install.packages("wbstats")
> library("wbstats")
> #install.packages("janitor")
> library("janitor")
> 
> #getting the data 
> #oil <- wb_search(pattern = "Oil rents" , fields = "indicator")
> #print(oil)
> #the code for Oil rents (% of GDP) is NY.GDP.PETR.RT.ZS
> #internet_oil <- wb_data(indicator - "NY.GDP.PETR.RT.ZS", "countries_only")
> #internet <- wb_search(pattern = "population" , fields = "indicator")
> # code for population total is SP.POP.TOTL
> #using wbstats to pull the data in 
> #store the list of indicators in an object 
> indicators <- c("oilrents" = "NY.GDP.PETR.RT.ZS", 
>                 "population" = "SP.POP.TOTL", 
>                 "Total natural resources rents " = "NY.GDP.TOTL.RT.ZS", 
>                 "population density" = "EN.POP.DNST", 
>                 "primary school enrollment" = "SE.PRM.NENR", 
>                 "gdpgrowth" = "NY.GDP.MKTP.KD.ZG")
> #download the data 
> wb_data <- wb_data(indicators ,
>                    mrv = 45) %>%
>   select(!iso2c) %>%
>   rename(year = date)
> rm(indicators)
> 
> #Getting the data ready to merge 
> wb_data <- wb_data %>%
>   filter(year >= 1981 & year <= 2014)
> View(wb_data)
> #A little bit of work needs to be done to get the WBdata ready to merge 
> #Switching iso3c codes to cowcodes 
> library(countrycode)
> ccode_dta <- codelist_panel
> View(ccode_dta)
> ccode_dta <- ccode_dta %>% 
>   filter(year >= 1981 & year <= 2014)
> ccode_dta <- ccode_dta[, c( "year", 
>                             "continent", 
>                             "cown", 
>                             "region",
>                             "country.name.en", 
>                             "iso3c")]
> 
> #adding ccodes to the WB data 
> wb_full_data <- left_join(wb_data, ccode_dta, by = c("year" = "year", 
>                                                      "iso3c" = "iso3c"))
> View(wb_full_data)
> wb_full_data <- wb_full_data %>%
>   rename(ccode = cown)
> rm(wb_data)
> rm(ccode_dta)
> 
> #Getting rid of WB data that isnt for countries 
> library(tidyr)
> wb_full_data <- wb_full_data[!is.na(wb_full_data$ccode), ]
> #removing duplicates 
> wb_full_data <- unique(wb_full_data)
> #there were no duplicates 
> pgms <- unique(pgms)
> 
> 
> 
> # Merging Data ------------------------------------------------------------
> 
> #Putting all of this into one DF 
> merging <- left_join(cow_df, coups, by = c("ccode" = "ccode", 
>                                            "year" = "year"))
> merging <- unique(merging)
> summary(merging)
> merging2 <- left_join(merging, pgms, by = c("gwcode" = "gwno", 
>                                             "year" = "year"))
> summary(merging2)
> unique(merging2)
> 
> 
> #Merge with epr 
> merging2 <- left_join(merging2, 
>                        collapsed_epr,
>                        by = c("gwcode" = "gwid", 
>                               "year" = "year"))
> rm(collapsed_epr)
> 
> #Final merge 
> full_data <- left_join(merging2,
>                        wb_full_data, 
>                        by = c("ccode" = "ccode", 
>                                "year" = "year"))
> full_data <- unique(full_data)
> 
> #removing everything except the final dataframe
> rm(coups, cow_df, merging, merging2, pgms, wb_full_data)
> 
> 
> #Now I need to do a little work on this DF, changing names, fixing variables, etc. 
> 
> # Cleaning Data -----------------------------------------------------------
> 
> #just making sure the temporal bounds are set correctedly 
> full_data <- full_data %>%
>   filter(year >= 1981 & year <= 2014)
> 
> #Some name changes 
> full_data <- full_data %>%
>   rename(gdppc = wbgdppc2011est) %>%
>   rename(state = statenme) %>%
>   rename(polity = polity2) %>%
>   rename(rugged = rugged) %>%
>   rename(maj_power = cowmaj) %>%
>   rename(gpd = wbgdp2011est) %>%
>   rename(pop = wbpopest) %>%
>   rename(mil_expend = milex) %>%
>   rename(cinc = cinc) %>%
>   rename(cw_onset = ucdponset) %>%
>   rename(peace_years = ts_war) %>%
>   rename(coup = coup) %>%
>   rename(pgm_count = presence_count) %>%
>   rename(govt_pgm_count = presence_gov_formed_count) %>%
>   rename(notgovt_pgm_count = presence_notgov_formed_count) %>%
>   rename(pgm_presence = presence) %>%
>   rename(govt_pgm_presence = presence_govformed) %>%
>   rename(not_govt_pgm_presence = presence_notgovformed) %>%
>   rename(ongoing_conflict = ucdpongoing) %>% 
>   rename(natl_rents = `Total natural resources rents `) %>%
>   rename(school_rates = `primary school enrollment`) %>%
>   rename(population_density = `population density`)
> 
> #Now I need to fix up the coup variable#first thing I need to do is change all the NAs to 0
> full_data <- full_data %>% 
>   mutate(coup = ifelse(is.na(coup), 0, coup))
> 
> #time since variable coup
> #library(stevemisc)
> #fulldata <- ps_btscs(full_data, coup, year, ccode)
> #summary(fulldata$spell)
> #fulldata <- fulldata %>%
>  # rename(ts_coup = spell)
> 
> 
> #coldwar dummy
> full_data$post_cw <- ifelse(full_data$year > 1991, 1, 0)
> 
> #I need to create logs and lags of certain variables 
> 
> # Define a small constant value - need to create logs for some variables with 0's
> epsilon <- 1e-6  # You can adjust this value as needed
> # Create log_rents with handling for zero values
> full_data$log_rents <- log(full_data$natl_rents + epsilon)
> full_data$log_milex <- log(full_data$mil_expend + epsilon)
> 
> #logs
> full_data <- full_data |>
>   mutate(log_gdp = log(gpd)) |>
>   mutate(log_pop = log(population)) |>
>   mutate(log_vdem = log(v2x_polyarchy)) |>
>   mutate(polity_sq = polity^2) |>
>   mutate(peace_years_sq = peace_years^2) |>
>   mutate(peace_years_cb = peace_years^3) |>
>   mutate(Vdem_sq = v2x_polyarchy^2) |>
>   mutate(log_rugged = log(rugged)) 
> 
> 
> 
> 
> #lags
> full_data <- full_data %>%
>   group_by(ccode) %>%
>   mutate(lag_presence = lag(pgm_presence, 1)) %>%
>   mutate(lag_count = lag(pgm_count, 1)) %>%
>   mutate(lag_govt_pgm = lag(govt_pgm_presence, 1)) %>%
>   mutate(lag_not_govt_pgm = lag(not_govt_pgm_presence, 1)) %>%  
>   mutate(lag_govt_pgm_count = lag(govt_pgm_count, 1)) %>%
>   mutate(lag_not_govt_pgm_count = lag(notgovt_pgm_count, 1)) 
> 
> #making the controls make sense 
> full_data <- full_data |>
>   mutate(mil_ex_2 = mil_expend/10000000)
> 
> 
> 
> 
> #making sure there are no duplicates
> duplicate_rows <- duplicated(full_data[c("ccode", "year")])
> # Remove duplicate observations and overwrite the original dataframe
> full_data <- full_data[!duplicate_rows, ]
> #there were none, so all good 
>   
> #saving the dataframe 
> save(full_data,
>      file = "/Users/benjaminleo/Downloads/PGMs_and_CW_onset_Empirics795_cleaned_data.Rdata")
> 
> # Saving the Data ---------------------------------------------------------
> 
> 
> #install.packages("foreign")
> library(foreign)
> write.dta(full_data, "pgm_cw_full.dta")
> 
> #Save as a csv
> library(readr)
> write_csv(full_data,
>           "/Users/benjaminleo/Downloads/PGMs_and_CW_onset_Empirics795_cleaned_data.csv")
> 
> 
> # Graphics  ---------------------------------------------------------------
> 
> 
> 
> #Making a PGM Map for the intro section - before I start my analyses I want to make a nice map showing the number of PGMs in each country 
> #install.packages("dataverse")
> library(dataverse)
> #install.packages("bit")
> library(bit)
> Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
> pgm_group_df <- get_dataframe_by_name(
>   filename = "pgmdv2_group.tab",
>   dataset = "doi:10.7910/DVN/YK8L4I",
>   server = "dataverse.harvard.edu"
> )
> View(pgm_group_df)
> 
> # Assuming pgm_group_df is your dataframe
> pgm_group_df <- pgm_group_df %>%
>   group_by(gwno, country) %>%
>   summarise(total_groups = n())
> 
> #making a blank world map 
> # Load necessary libraries
> library(maps)
> library(countrycode)
> # Get world map data
> world_map <- map_data("world")
> 
> 
> #Adding gwno to world map data 
> world_map$gwno <- countrycode(world_map$region, origin = "country.name", destination = "gwn")
> #merging
> merged_df <- left_join(world_map, pgm_group_df, by = "gwno")
> 
> #plot
> ggplot() +
>   geom_polygon(data = merged_df, aes(x = long, y = lat, 
>                                      group = group, 
>                                      fill = total_groups),
>                                     color = "black") +
>   scale_fill_gradient(name = "Total Groups", 
>                       low = "white", 
>                       high = "black", 
>                       na.value = "white") +
>   coord_fixed() +
>   theme_void() + 
>   labs(
>     title = "Global Distribution of PGMs (1981-2014)"
>   )
> 
> rm(merged_df, world_map, pgm_group_df)
> 
> 
> #Making a map of cw onset
> # Aggregate to find the sum of 'cw_onset' by 'ccode'
> collapsed_data <- aggregate(cw_onset ~ gwcode,
>                             data = full_data, sum)
> # Rename the aggregated column for clarity
> names(collapsed_data)[2] <- "sum_cw_onset"
> # Print the resulting dataframe
> View(collapsed_data)
> #making a blank world map 
> # Load necessary libraries
> library(maps)
> library(countrycode)
> # Get world map data
> world_map <- map_data("world")
> 
> #Adding gwno to world map data 
> world_map$gwno <- countrycode(world_map$region, origin = "country.name", destination = "gwn")
> #merging
> merged_df <- left_join(world_map, collapsed_data, by = c("gwno" = "gwcode"))
> 
> #plot
> ggplot() +
>   geom_polygon(data = merged_df, aes(x = long, 
>                                      y = lat, 
>                                      group = group, 
>                                      fill = sum_cw_onset), 
>                                     color = "black") +
>   scale_fill_gradient(name = "Number of Civil Conflicts", 
>                       low = "white", 
>                       high = "black", 
>                       na.value = "white") +
>   coord_fixed() +
>   theme_void() + 
>   labs(
>     title = "Global Distribution of Civil Conflicts (1981-2014)"
>   )
> 
> 
> #conflict over time plot 
> 
> #count df
> collapsed_conflict <- full_data %>%
>   group_by(year) %>%
>   summarise(cc_onset_count = sum(cw_onset, 
>                                      na.rm = TRUE), 
>             .groups = 'drop')
> 
> #Creating the plot 
> ggplot(collapsed_conflict, aes(x = year, y = cc_onset_count)) +
>   geom_line() +
>   labs(x = "Year", y = "Number of New Civil Conflicts") +
>   ggtitle("Civil Conflict Onset by Year (1981-2014)") +
>   scale_y_continuous(breaks = seq(0, max(collapsed_conflict$cc_onset_count), by = 1)) +
>   theme_bw() +
>   theme(panel.grid.major = element_blank(),
>         panel.grid.minor = element_blank())
> 
> #PGMs over time plot 
> 
> #count df
> collapsed_PGMs <- full_data %>%
>   group_by(year) %>%
>   summarise(pgm_count = sum(pgm_count, 
>                                  na.rm = TRUE), 
>             .groups = 'drop')
> 
> #Creating the plot 
> ggplot(collapsed_PGMs, aes(x = year, y = pgm_count)) +
>   geom_line() +
>   labs(x = "", y = "Number of Active PGMs") +
>   ggtitle("PGM Presence by Year (1981-2014)") +
>   scale_y_continuous(breaks = seq(0, max(collapsed_conflict$cc_onset_count), by = 1)) +
>   theme_bw() +
>   theme(panel.grid.major = element_blank(),
>         panel.grid.minor = element_blank())
> 
> #States with a PGM active by year  
> 
> #count df
> collapsed_PGMs <- full_data %>%
>   group_by(year) %>%
>   summarise(pgm_presence = sum(pgm_presence, 
>                             na.rm = TRUE), 
>             .groups = 'drop')
> 
> #Creating the plot 
> ggplot(collapsed_PGMs, aes(x = year, y = pgm_presence)) +
>   geom_line() +
>   labs(x = "", y = "Number of States with a PGM") +
>   ggtitle("Number of States With a PGM (1981-2014)") +
>   scale_y_continuous(breaks = seq(0, max(collapsed_PGMs$year), by = 5)) +
>   theme_bw() +
>   theme(panel.grid.major = element_blank(),
>         panel.grid.minor = element_blank())

Warning: Some values were not matched unambiguously: American Samoa, Andorra, Anguilla, Antarctica, Antigua, Aruba, Ascension Island, Azores, Barbuda, Bermuda, Bonaire, Canary Islands, Cayman Islands, Chagos Archipelago, Christmas Island, Cocos Islands, Cook Islands, Curacao, Dominica, Falkland Islands, Faroe Islands, French Guiana, French Polynesia, French Southern and Antarctic Lands, Greenland, Grenada, Grenadines, Guadeloupe, Guam, Guernsey, Heard Island, Isle of Man, Jersey, Kiribati, Liechtenstein, Madeira Islands, Marshall Islands, Martinique, Mayotte, Micronesia, Monaco, Montserrat, Nauru, Nevis, New Caledonia, Niue, Norfolk Island, Northern Mariana Islands, Palau, Palestine, Pitcairn Islands, Puerto Rico, Reunion, Saba, Saint Barthelemy, Saint Helena, Saint Kitts, Saint Lucia, Saint Martin, Saint Pierre and Miquelon, Saint Vincent, Samoa, San Marino, Sao Tome and Principe, Seychelles, Siachen Glacier, Sint Eustatius, Sint Maarten, South Georgia, South Sandwich Islands, Tonga, Turks and Caicos Islands, Vanuatu, Virgin Islands, Wallis and Futuna, Western Sahara, Yemen 

NOTE: Log Print Time:  2025-01-15 10:14:37.958381 
NOTE: Elapsed Time: 23.7016859054565 secs 

Warning: Some values were not matched unambiguously: American Samoa, Andorra, Anguilla, Antarctica, Antigua, Aruba, Ascension Island, Azores, Barbuda, Bermuda, Bonaire, Canary Islands, Cayman Islands, Chagos Archipelago, Christmas Island, Cocos Islands, Cook Islands, Curacao, Dominica, Falkland Islands, Faroe Islands, French Guiana, French Polynesia, French Southern and Antarctic Lands, Greenland, Grenada, Grenadines, Guadeloupe, Guam, Guernsey, Heard Island, Isle of Man, Jersey, Kiribati, Liechtenstein, Madeira Islands, Marshall Islands, Martinique, Mayotte, Micronesia, Monaco, Montserrat, Nauru, Nevis, New Caledonia, Niue, Norfolk Island, Northern Mariana Islands, Palau, Palestine, Pitcairn Islands, Puerto Rico, Reunion, Saba, Saint Barthelemy, Saint Helena, Saint Kitts, Saint Lucia, Saint Martin, Saint Pierre and Miquelon, Saint Vincent, Samoa, San Marino, Sao Tome and Principe, Seychelles, Siachen Glacier, Sint Eustatius, Sint Maarten, South Georgia, South Sandwich Islands, Tonga, Turks and Caicos Islands, Vanuatu, Virgin Islands, Wallis and Futuna, Western Sahara, Yemen 

NOTE: Log Print Time:  2025-01-15 10:14:39.623313 
NOTE: Elapsed Time: 1.66493201255798 secs 

========================================================================= 
Log End Time: 2025-01-15 10:14:56.69737 
Log Elapsed Time: 0 00:00:42 
========================================================================= 
